#!/usr/bin/python
"""
%InsertOptionParserUsage%


@author: 	Arthur Kantor
@contact: 	akantorREMOVE_THIS@uiuc.edu
@copyright:	Arthur Kantor 2008
@license: 	GPL version 3
@date: 		6/4/2009
@version: 	0.9

"""

import sys, re, os;
import logging
import time
import argparse
import textwrap
from gmtkParam import *
from util import *
from util.UniqueList import *
import copy



def makeParser():
	usage = """
genRightContextDT.py [--help] [options] 
Takes a phoneIndexWordToPhoneDt, and creates the phoneIndex_word_2_nextPhone DT pointing to the  phone after 
the one specified by the index.
For now contexts can only be phonemes
The indexes are assigned to contexts according to the orderNameCollectionFile.
"""
	def readableFile(fname):
		if os.access(fname,os.R_OK):
			return fname
		else:
			print "cannot read file %s"%fname
			raise TypeError()

	parser = argparse.ArgumentParser(prog='genRightContextDT.py', description=textwrap.dedent(usage),formatter_class=argparse.RawDescriptionHelpFormatter)
	
	parser.add_argument("-p", "--phoneIndexWordToPhoneDt",type=readableFile, metavar="FILE", required=True, 
					  help="reads the phoneIndex_word_2_phone DT from FILE and uses to make the right context DT.")

	parser.add_argument("-n", "--orderNameCollectionFile", type=readableFile, metavar="FILE", required=True,
					  help="The tri-units are generated by iterating through the units in the order listed in orderNameCollectionFile.  The names should be in the gm:featureKind:unit:subUnitState described above. If unspecified the units are iterated alphabetically.")

	parser.add_argument('-r', '--rightContextDT', type=str, metavar="FILE", required=True,	
					help="write the phoneIndex_word_2_phone DT to FILE.")

	parser.add_argument("-v", "--verbosity", type=int, metavar="INT", default = 51-logging.INFO,
		help="Prints debug info to STDOUT ranges from 1 (critical) to 50 (everything) default: %(default)s")
	return parser
	
def main(argv):
	cmd=' '.join(argv)
	opts = parser.parse_args()

	#set up logging
	logging.basicConfig(stream=sys.stdout,format='%(levelname)s %(message)s',level=51-opts.verbosity)
	logging.info('Program started on %s as %s', time.ctime(), cmd)
	
	comment= '%'+" generated with command: %s\n\n"%cmd
	
	
	ws=Workspace()
	ws.readFromFile(NameCollection,opts.orderNameCollectionFile)
	logging.info('read NameCollection %s', opts.orderNameCollectionFile)
	orderColl=ws[NameCollection].values()[0] #there is only one value
	subUnitNames=[i.split(':',2)[2] for i in orderColl]
	unitNames=UniqueList([i.split(':')[0] for i in subUnitNames])
	
	logging.info('reading %s for generating right contexts',opts.phoneIndexWordToPhoneDt)
	dts=DTs(Workspace(), opts.phoneIndexWordToPhoneDt)
	piTopDT = dts.trees[0]
	logging.info('generating  contexts')
	dtWs=genRightContextDT(piTopDT,unitNames)
	logging.info('writing contexts to %s', opts.rightContextDT)
	dto=WorkspaceIO(file(opts.rightContextDT,'w'))
	dtWs.writeToIO(DT,dto)
	dto.close()


def genRightContextDT(piTopDT,unitNames):

	contexts = [u for u in unitNames if not (u[0] =='.' and u[-1] =='.')] 
	contextToId=dict([(c,i) for (i,c) in enumerate(contexts)])

	unitToLeftPhoneMap={}		
	for i,u in enumerate(unitNames):
		if u[0]=='.' and u[-1]=='.':
			unit=u[1:-1].split('.',1)[0]
			componentPhones=unit.split('_')
		 	
		 	leftPhone=componentPhones[0]
		 	l=TreeLeaf(contextToId[leftPhone])
		 	unitToLeftPhoneMap[i]=TreeBranch(-1,l,'%s ==> %s'%(u,leftPhone))
		else:
			#assume it's a simple phoneme
			if i != contextToId[u]:
				logging.error('unit %s (index %d) is a simple phone but maps to index %d.  Right context tree will not be built correctly.',u,i,contextToId[u])
				raise ValueError()
			 	#l=TreeLeaf(contextToId[phone])
			 	#unitToRightPhoneMap[i]=TreeBranch(-1,l,'%s ==> %s'%(u,phone))

	print ("FIXME! check that the left and right contexts for the non-phone units make sense")
	#build the right tree
	#this is done by taking phoneIndexWord and having each phoneIndex point to the next phone.
	#The last phoneIndex in a word (EOW) also points to EOW, and EOW is the default value.
	rb=piTopDT.tree
	for i in range(rb.numQuestions()+1): #default is the last question 
		wordTree = rb[i]
		for j in range(wordTree.numQuestions()-1):
			wordTree[j]=wordTree[j+1]
			unitId = wordTree[j][0](None)
			if unitId in unitToLeftPhoneMap: #map a unit to a simple phone
				wordTree[j]= unitToLeftPhoneMap[unitId]
			else: #the unit should already be a phone just check correctness via the comment 
				phoneName=wordTree[j].comment.strip()
				if contextToId[phoneName] != unitId:
					raise ValueError('Bloody Murder!  phoneme %s apparently has index %d but should have context index %d',phoneName,unitId,contextToId[phoneName])
		del wordTree[wordTree.numQuestions()-1]

	dtWS=Workspace()	
	rt=DT(dtWS,'phoneIndex_word_2_nextPhone',2,rb)
	
	
	return dtWS
	
		
#the parser is used for generating documentation, so create it always, and augment __doc__ with usage info  
#This messes up epydoc a little, but allows us to keep a single version of documentation for all purposes
parser = makeParser()
__doc__ = __doc__.replace("%InsertOptionParserUsage%\n", parser.format_help())

if __name__ == "__main__":
	main(sys.argv)
	logging.info('Program finished on %s', time.ctime())
